# -!- coding: utf-8 -!-
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#作者：cacho_37967865
#博客：https://blog.csdn.net/sinat_37967865
#文件：tencent_ocr.py
#日期：2019-10-15
#备注：Python利用腾讯云文字识别技术进行文字识别
pip install tencentcloud-sdk-python，先要去腾讯申请appid，secret_id，secret_key
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models

from base_model import encode_base64

secret_id = 'AKIDPbYFj7FExQfeNsqNHvKUP4XO2Rdankxy'
secret_key = '0lGZdRzINK3XnnY5MPL2VyGK8h2FExRg'


try:
    cred = credential.Credential(secret_id, secret_key)
    httpProfile = HttpProfile()

    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile

    httpProfile.endpoint = "ocr.tencentcloudapi.com"
    client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

except TencentCloudSDKException as err:
    print(err)


# 通用文字识别->通用印刷体识别：支持多场景、任意版面下整图文字的识别。支持自动识别语言类型，同时支持自选语言种类（推荐）
# 应用场景包括：印刷文档识别、网络图片识别、广告图文字识别、街景店招识别、菜单识别、视频标题识别、头像文字识别等。
# 默认接口请求频率限制：20次/秒
def generalBasicOCR():
    req = models.GeneralBasicOCRRequest()
    params =  '{"ImageBase64":"%s"}' %(encode_base64('ocr\\table.png'))            #  '{"ImageUrl":"sg","LanguageType":"zh"}'
    req.from_json_string(params)

    resp = client.GeneralBasicOCR(req)
    get_result(resp)


# 行业文档识别->表格识别
def tableOCR():
    req = models.TableOCRRequest()
    params =  '{"ImageBase64":"%s"}' %(encode_base64('ocr\\table.png'))
    req.from_json_string(params)

    resp = client.TableOCR(req)
    get_result(resp)


def get_result(resp):
    str_result = resp.to_json_string()
    dict_result = eval(str_result)
    # print(dict_to_json(dict_result))
    texts = dict_result['TextDetections']
    for text in texts:
        print(text['DetectedText'])


if __name__ == '__main__':
    #generalBasicOCR()     # 通用印刷体识别
    tableOCR()            # 表格识别